---
title: "External Validity: Framework, Design and Analysis"
subtitle: "Data Download and Preparation"
author: "Naoki Egami and Erin Hartman"
date: \today
output:
  html_document:
    df_print: paged
---

```{r setup, include=FALSE}
rm(list = ls())
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE)
knitr::opts_chunk$set(message = FALSE)
```

```{r}
library(tidyverse)
library(grf)
library(foreign)
library(lmtest)
library(sandwich)
library(survey)
library(dataverse)
library(readr)
library(readstata13)
```

## System Information
```{r}
print(Sys.time())

start_time <- Sys.time()

print(sessionInfo())
```

## Generate Folders to save data

```{r}
if(!file.exists("./generated/")) dir.create("./generated")
if(!file.exists("./generated/data")) dir.create("./generated/data")
```

## Download Broockman and Kalla (2016) data

```{r}
################################################################################
## Get the Broockman and Kalla (2016) data from dataverse
## located at `https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/WKR39N`.

## Kalla, Joshua; David, Broockman, 2016, "Replication Data for: Durably reducing transphobia: a ## field experiment on door-to-door canvassing", https://doi.org/10.7910/DVN/WKR39N, Harvard 
## Dataverse, V3, UNF:6:uuwGc6DuPzVlcVKT4RVFvg== [fileUNF]

# Download file `broockman_kalla_replication_data.tab` as a .dta file.
################################################################################
if(!file.exists("./generated/data/broockman_kalla_replication_data.dta")) {
  writeBin(get_file_by_name(
    filename = "broockman_kalla_replication_data.tab",
    dataset  = "doi:10.7910/DVN/WKR39N",
    server   = "dataverse.harvard.edu"
  ), "./generated/data/broockman_kalla_replication_data.dta")
}


data <- read.dta('./generated/data/broockman_kalla_replication_data.dta')
```

### Recode data
The following code is the code for transforming data from the original analysis.  Original code can be found at:
https://dataverse.harvard.edu/file.xhtml?persistentId=doi:10.7910/DVN/WKR39N/QXCH0H&version=3.0
All code except that for recoding variables to match the CCES comes from the original authors.

```{r, results=FALSE}

compute.factor.dv <- function(dv.names, respondent.booleans, print.loadings = TRUE){
  responders <- data[respondent.booleans,]
  
  # Factor analysis
  factor.obj <- princomp(responders[, dv.names], cor=TRUE)
  if(print.loadings) print(loadings(factor.obj))
  dv <- as.vector(factor.obj$scores[,1])
  
  # More positive values on the factor should indicate more tolerance; reverse otherwise.
  if(cor(dv, responders$miami_trans_law_t0, use="complete.obs") < 0) dv <- -1 * dv
  
  # Put in the order of the main data frame
  dv.in.order <- dv[match(data$id, responders$id)]
  
  # Rescale to mean 0 sd 1 in placebo group; treatment effects can then be interpreted
  # as the effect in standard deviations the treatment would have among an untreated
  # population.
  dv.in.order <- (dv.in.order - mean(dv.in.order[!data$treat_ind], na.rm=TRUE)) /
    sd(dv.in.order[!data$treat_ind], na.rm=TRUE)
  
  return(as.vector(dv.in.order))
}


all.dv.names.t1 <- c('miami_trans_law_t1', 'miami_trans_law2_t1', 'therm_trans_t1',
                     'gender_norm_sexchange_t1', 'gender_norm_moral_t1',
                     'gender_norm_abnormal_t1', 'gender_norm_trans_moral_wrong_t1')

all.dv.names.t2 <- c('miami_trans_law_t2', 'miami_trans_law2_t2', 'therm_trans_t2',
                     'gender_norm_sexchange_t2', 'gender_norm_moral_t2',
                     'gender_norm_abnormal_t2', 'gender_norm_trans_moral_wrong_t2')

all.dv.names.t3 <- c('miami_trans_law_withdef_t3', 'miami_trans_law2_withdef_t3', 
                     'therm_trans_t3', 'gender_norm_sexchange_t3', 'gender_norm_moral_t3',
                     'gender_norm_abnormal_t3','gender_norm_trans_moral_wrong_t3')

all.dv.names.t4 <- c('miami_trans_law_withdef_t4', 'miami_trans_law2_withdef_t4', 
                     'therm_trans_t4', 'gender_norm_sexchange_t4', 'gender_norm_moral_t4',
                     'gender_norm_abnormal_t4', 'gender_norm_trans_moral_wrong_t4')

#### Transgender Tolerance Index
# Note that we code the scale such that larger, more positive values indicate more tolerance and less prejudice.
trans.tolerance.dvs.t0 <- c('therm_trans_t0', 'gender_norms_sexchange_t0',
                            'gender_norms_moral_t0', 'gender_norms_abnormal_t0')

trans.tolerance.dvs.t1 <- c('therm_trans_t1', 'gender_norm_sexchange_t1',
                            'gender_norm_moral_t1', 'gender_norm_abnormal_t1',
                            'gender_norm_trans_moral_wrong_t1')

trans.tolerance.dvs.t2 <- c('therm_trans_t2', 'gender_norm_sexchange_t2',
                            'gender_norm_moral_t2', 'gender_norm_abnormal_t2',
                            'gender_norm_trans_moral_wrong_t2',
                            'trans_teacher_t2', 'trans_bathroom_t2')

trans.tolerance.dvs.t3 <- c('therm_trans_t3', 'gender_norm_sexchange_t3',
                            'gender_norm_moral_t3', 'gender_norm_abnormal_t3',
                            'gender_norm_trans_moral_wrong_t3',
                            'trans_teacher_t3', 'trans_bathroom_t3')

trans.tolerance.dvs.t4 <- c('therm_trans_t4', 'gender_norm_sexchange_t4',
                            'gender_norm_moral_t4', 'gender_norm_abnormal_t4',
                            'gender_norm_trans_moral_wrong_t4',
                            'trans_teacher_t4', 'trans_bathroom_t4')

trans.law.dvs.t0 <- c('miami_trans_law_t0', 'miami_trans_law2_t0')
trans.law.dvs.t1 <- c('miami_trans_law_t1', 'miami_trans_law2_t1')
trans.law.dvs.t2 <- c('miami_trans_law_t2', 'miami_trans_law2_t2')
# Note: Beginning with t3, the definition was added. 
trans.law.dvs.t3 <- c('miami_trans_law_withdef_t3', 'miami_trans_law2_withdef_t3')
trans.law.dvs.t4 <- c('miami_trans_law_withdef_t4', 'miami_trans_law2_withdef_t4')

gender.nonconformity.t0 <- c('gender_norm_looks_t0', 'gender_norm_rights_t0')
gender.nonconformity.t1 <- c('gender_norm_looks_t1', 'gender_norm_rights_t1')
# Note: Beginning with t2, an additional item was added to the measure. 
gender.nonconformity.t2 <- c('gender_norm_looks_t2', 'gender_norm_rights_t2', 
                             'gender_norm_dress_t2')
gender.nonconformity.t3 <- c('gender_norm_looks_t3', 'gender_norm_rights_t3', 
                             'gender_norm_dress_t3')
gender.nonconformity.t4 <- c('gender_norm_looks_t4', 'gender_norm_rights_t4', 
                             'gender_norm_dress_t4')

reverse.coded.items <- c('gender_norms_moral_t0', 'gender_norm_moral_t1',
                         'gender_norm_moral_t2', 'gender_norm_moral_t3',
                         'gender_norm_moral_t4', 'gender_norms_abnormal_t0',
                         'gender_norm_abnormal_t1', 'gender_norm_abnormal_t2',
                         'gender_norm_abnormal_t3','gender_norm_abnormal_t4',
                         'gender_norm_trans_moral_wrong_t1', 
                         'gender_norm_trans_moral_wrong_t2',
                         'gender_norm_trans_moral_wrong_t3',
                         'gender_norm_trans_moral_wrong_t4',
                         'trans_bathroom_t2', 'trans_bathroom_t3',
                         'trans_bathroom_t4', 'gender_norm_looks_t0',
                         'gender_norm_looks_t1', 'gender_norm_looks_t2',
                         'gender_norm_looks_t3', 'gender_norm_looks_t4',
                         'gender_norm_rights_t0', 'gender_norm_rights_t1',
                         'gender_norm_rights_t2', 'gender_norm_rights_t3',
                         'gender_norm_rights_t4', 'gender_norm_dress_t2',
                         'gender_norm_dress_t3', 'gender_norm_dress_t4')
for(item in reverse.coded.items) data[,item] <- -1 * data[,item]

# Recode age for small number of observations where it is missing.
data$vf_age[which(is.na(data$vf_age))] <- mean(data$vf_age, na.rm=TRUE)

# Language of interview
data$survey_language_es[is.na(data$survey_language_es)] <-
  data$survey_language_t0[is.na(data$survey_language_es)] == "ES"
data$survey_language_es[is.na(data$survey_language_es)] <- mean(data$survey_language_es, na.rm = TRUE)
```


Recode ideology, religiosity, PID, and age buckets to match those of the CCES.
```{r}
data <- data %>%
  mutate(ideology_t0_factor = case_when(ideology_t0 == -3 ~ "Very liberal",
                                 ideology_t0 == -2 ~ "Liberal",
                                 abs(ideology_t0) <= 1 ~ "Moderate",
                                 ideology_t0 == 2 ~ "Conservative",
                                 ideology_t0 == 3 ~ "Very conservative"),
         religious_t0_factor = case_when(religious_t0 < 1 ~ "Never",
                                  religious_t0 < 2 ~ "Seldom/A few times a year",
                                  religious_t0 < 4 ~ "Once or twice a month",
                                  religious_t0 < 5 ~ "Once a week",
                                  religious_t0 < 6 ~ "More than once a week"),
         pid_t0_factor = case_when(pid_t0 == -3 ~ "Strong Democrat",
                            pid_t0 == -2 ~ "Not very strong Democrat",
                            pid_t0 == -1 ~ "Lean Democrat",
                            pid_t0 == 0 ~ "Independent",
                            pid_t0 == 1 ~ "Lean Republican",
                            pid_t0 == 2 ~ "Not very strong Republican",
                            pid_t0 == 3 ~ "Strong Republican"),
         vf_age_bucket = case_when(vf_age < 35 ~ "a_18to34",
                                   vf_age < 50 ~ "b_35to49",
                                   vf_age < 65 ~ "c_50to64",
                                   TRUE ~ "d_65plus"))
```

Subset to contacted.
```{r}
# We subset to only those who came to door. contacted = came to door.
full_data <- data
data <- subset(data, contacted == 1)

# Compute the DVs in line with the above procedures.

# Omnibus DV of all primary outcomes.
data$all.dvs.t1 <- compute.factor.dv(all.dv.names.t1, data$respondent_t1==1 & !is.na(data$respondent_t1))
data$all.dvs.t2 <- compute.factor.dv(all.dv.names.t2, data$respondent_t2==1 & !is.na(data$respondent_t2))
data$all.dvs.t3 <- compute.factor.dv(all.dv.names.t3, data$respondent_t3==1 & !is.na(data$respondent_t3))
data$all.dvs.t4 <- compute.factor.dv(all.dv.names.t4, data$respondent_t4==1 & !is.na(data$respondent_t4))

# Trans tolerance DV.
data$trans.tolerance.dv.t0 <- compute.factor.dv(trans.tolerance.dvs.t0, data$respondent_t0==1 & !is.na(data$respondent_t0))
data$trans.tolerance.dv.t1 <- compute.factor.dv(trans.tolerance.dvs.t1, data$respondent_t1==1 & !is.na(data$respondent_t1))
data$trans.tolerance.dv.t2 <- compute.factor.dv(trans.tolerance.dvs.t2, data$respondent_t2==1 & !is.na(data$respondent_t2))
data$trans.tolerance.dv.t3 <- compute.factor.dv(trans.tolerance.dvs.t3, data$respondent_t3==1 & !is.na(data$respondent_t3))
data$trans.tolerance.dv.t4 <- compute.factor.dv(trans.tolerance.dvs.t4, data$respondent_t4==1 & !is.na(data$respondent_t4))

# Law DV.
# Create outcome scale by averaging over the two questions.
data$miami_trans_law_t0_avg <- (data$miami_trans_law_t0 + data$miami_trans_law2_t0)/2
data$miami_trans_law_t1_avg <- (data$miami_trans_law_t1 + data$miami_trans_law2_t1)/2
data$miami_trans_law_t2_avg <- (data$miami_trans_law_t2 + data$miami_trans_law2_t2)/2
# Note: Beginning with t3, the definition was added. 
data$miami_trans_law_t3_avg <- (data$miami_trans_law_withdef_t3 + 
                                  data$miami_trans_law2_withdef_t3)/2
# Note: Only one question was asked in t3 after the ad was shown, so no averaging is required.
data$miami_trans_law_t4_avg <- (data$miami_trans_law_withdef_t4 + 
                                  data$miami_trans_law2_withdef_t4)/2

# Gender Non-Conformity DV
data$gender_nonconformity_t0 <- compute.factor.dv(gender.nonconformity.t0, data$respondent_t0==1 & !is.na(data$respondent_t0))
data$gender_nonconformity_t1 <- compute.factor.dv(gender.nonconformity.t1, data$respondent_t1==1 & !is.na(data$respondent_t1))
data$gender_nonconformity_t2 <- compute.factor.dv(gender.nonconformity.t2, data$respondent_t2==1 & !is.na(data$respondent_t2))
data$gender_nonconformity_t3 <- compute.factor.dv(gender.nonconformity.t3, data$respondent_t3==1 & !is.na(data$respondent_t3))
data$gender_nonconformity_t4 <- compute.factor.dv(gender.nonconformity.t4, data$respondent_t4==1 & !is.na(data$respondent_t4))


## treatment delivered
data$treatment.delivered <- as.numeric(data$exp_actual_convo == "Trans-Equality" & !is.na(data$canvass_trans_ratingstart))
```

Original author;s moderators and residualizing model.
```{r}
t0.covariate.names_original <- c('miami_trans_law_t0', 'miami_trans_law2_t0', 'therm_trans_t0', 
                        'gender_norms_sexchange_t0', 'gender_norms_moral_t0', 'gender_norms_abnormal_t0',
                        'ssm_t0', 'therm_obama_t0', 'therm_gay_t0','vf_democrat', 'ideology_t0', 
                        'religious_t0', 'exposure_gay_t0', 'exposure_trans_t0', 'pid_t0', 'sdo_scale',
                        'gender_norm_daugher_t0', 'gender_norm_looks_t0', 
                        'gender_norm_rights_t0', 'therm_afams_t0', 'vf_female', 'vf_hispanic',
                        'vf_black', 'vf_age', 'survey_language_es', 'cluster_level_t0_scale_mean')

for_residualize <- as.matrix(data[, t0.covariate.names_original])

# Residualize the dependent variable, then transform it per (*44*).
t1.resid <- summary(lm(data$trans.tolerance.dv.t1 ~ for_residualize))$residuals
data$t1.resid[as.numeric(names(t1.resid))] <- t1.resid # Maps residuals back into data.
data$transformed.outcome <- with(data, t1.resid * (treat_ind - .5) / .25)
```

## Download CCES Data

Load CCES data and recode all the overlapping variables.  Note, using VF variables as self-report in the CCES.


```{r}
################################################################################
## Get the 2016 CCES data from dataverse
## located at `https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi%3A10.7910/DVN/GDF6Z0`.
##
## Ansolabehere, Stephen; Schaffner, Brian F., 2017, "CCES Common Content, 2016", 
## https://doi.org/10.7910/DVN/GDF6Z0, Harvard Dataverse, V4, 
## UNF:6:WhtR8dNtMzReHC295hA4cg== [fileUNF]
## Download data `CCES16_Common_OUTPUT_Feb2018_VV.tab` as .dta
################################################################################
if(!file.exists("./generated/data/CCES16_Common_Content.dta")) {
  writeBin(get_file_by_name(
    filename = "CCES16_Common_OUTPUT_Feb2018_VV.tab",
    dataset  = "doi:10.7910/DVN/GDF6Z0",
    server   = "dataverse.harvard.edu"
  ), "./generated/data/CCES16_Common_Content.dta")
}

################################################################################
## Get the 2016 CCES Voter Validation data from dataverse
## located at `https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/2NNA4L`.
##
## Enamorado, Ted; Imai, Kosuke, 2018, "CCES 2016 Voter Validation Supplemental Data", 
## https://doi.org/10.7910/DVN/2NNA4L, Harvard Dataverse, V1, 
## UNF:6:cgTs9HySQeCE3AnztRF7Mw== [fileUNF]
## 
## Download data `cces2016voterval.tab` as .csv
################################################################################
if(!file.exists("./generated/data/cces2016voterval.csv")) {
  writeBin(get_file_by_name(
    filename = "cces2016voterval.tab",
    dataset  = "doi:10.7910/DVN/2NNA4L",
    server   = "dataverse.harvard.edu"
  ), "./generated/data/cces2016voterval.csv")
}

cces_2016 <- read.dta13("./generated/data/CCES16_Common_Content.dta")
x <- read_csv("./generated/data/cces2016voterval.csv")

cces_2016 <- cces_2016 %>% left_join(x, by = "V101") %>% as.data.frame()
```

Recode variables that line up with the Brookman and Kalla (2016).

```{r}
cces_2016 <- cces_2016 %>% mutate(
                        'vf_age' = 2015 - birthyr,
                        'vf_female' = as.numeric(gender == "Female"),
                        'vf_black' = as.numeric(race == "Black"),
                        'vf_white' = as.numeric(race == "White"),
                        'vf_hispanic' = as.numeric(race == "Hispanic"),
                        'vf_vg_14' = vote2014,
                        'vf_vg_12' = vote2012,
                        'ideology_t0_factor' = case_when(ideo5 == "Not sure" ~ "Moderate",
                                                         is.na(ideo5) ~ "Moderate",
                                                  TRUE ~ as.character(ideo5)),
                        'ideology_t0' = case_when(ideo5 == "Very Liberal" ~ -3,
                                                  ideo5 == "Liberal" ~ -2,
                                                  ideo5 == "Conservative" ~ 2,
                                                  ideo5 == "Very Conservative" ~ 3,
                                                  ideo5 == "Not sure" ~ 0,
                                                  is.na(ideo5) ~ 0,
                                                  TRUE ~ 0),
                        'religious_t0_factor' = case_when(pew_churatd %in% c("Seldom", "A few times a year") ~ "Seldom/A few times a year",
                                                   pew_churatd == "Don't know" ~ "Seldom/A few times a year",
                                                   is.na(pew_churatd) ~ "Seldom/A few times a year",
                                                   TRUE ~ as.character(pew_churatd)),
                        'religious_t0' = case_when(pew_churatd == "Never" ~ 0,
                                                   pew_churatd %in% c("Seldom", "A few times a year") ~ 1,
                                                   pew_churatd == "Once or twice a month" ~ 3,
                                                   pew_churatd == "Once a week" ~ 4,
                                                   pew_churatd == "More than once a week" ~ 5,
                                                   is.na(pew_churatd) ~ 1,
                                                   TRUE ~ 1), ## modal prediction
                        'pid_t0_factor' = case_when(pid7 == "Not sure" ~ "Independent",
                                                    is.na(pid7) ~ "Independent",
                                             TRUE ~ as.character(pid7)),
                        'pid_t0' = case_when(pid7 == "Strong Democrat" ~ -3,
                                             pid7 == "Not very strong Democrat" ~ -2,
                                             pid7 == "Lean Democrat" ~ -1,
                                             pid7 == "Independent" ~ 0,
                                             pid7 == "Lean Republican" ~ 1,
                                             pid7 == "Not very strong Republican" ~ 2,
                                             pid7 == "Strong Republican" ~ 3,
                                             pid7 == "Not sure" ~ 0,
                                             is.na(pid7) ~ 0,
                                             TRUE ~ 0),
                         vf_age_bucket = case_when(vf_age < 35 ~ "a_18to34",
                                                   vf_age < 50 ~ "b_35to49",
                                                   vf_age < 65 ~ "c_50to64",
                                                   TRUE ~ "d_65plus")) ## modal prediction
```

Add factor indicators.
```{r}
make_indicator <- function(data, var, append = FALSE, drop_last = TRUE){
  data[, var] <- as.factor(data[, var])
  cols <- as_data_frame(setNames(lapply(levels(data[, var]), 
                                               function(x){ as.integer(data[, var] == x) }), 
                                        paste0(var, "_", gsub("[[:blank:]|[:punct:]]", "_", levels(data[, var])))))
  if(drop_last) cols <- cols[, 1:(ncol(cols) - 1)]
  
  if(append) {
    return(bind_cols(data, cols))
  } else {
    return(cols)
  }
}

data <- make_indicator(data, "religious_t0_factor", append = TRUE)
data <- make_indicator(data, "ideology_t0_factor", append = TRUE)
data <- make_indicator(data, "pid_t0_factor", append = TRUE)
data <- make_indicator(data, "vf_age_bucket", append = TRUE)

cces_2016 <- make_indicator(cces_2016, "religious_t0_factor", append = TRUE)
cces_2016 <- make_indicator(cces_2016, "ideology_t0_factor", append = TRUE)
cces_2016 <- make_indicator(cces_2016, "pid_t0_factor", append = TRUE)
cces_2016 <- make_indicator(cces_2016, "vf_age_bucket", append = TRUE)

full_data <- make_indicator(full_data, "religious_t0_factor", append = TRUE)
full_data <- make_indicator(full_data, "ideology_t0_factor", append = TRUE)
full_data <- make_indicator(full_data, "pid_t0_factor", append = TRUE)
full_data <- make_indicator(full_data, "vf_age_bucket", append = TRUE)
```

Save Broockmand and Kalla (2016) and CCES Datasets.
```{r}
sample <- data
pop <- cces_2016

## limit full data to overlapping columns
full_data <- full_data %>% select('vf_age', 'vf_female', 'vf_black', 'vf_white', 'vf_hispanic', 
                        'vf_vg_14', 'vf_vg_12', 'vf_vg_10', 'contacted',
                        'vf_democrat', 'vf_republican', 
                        names(full_data)[str_detect(names(full_data), "ideology_t0_factor|religious_t0_factor|pid_t0_factor|vf_age_bucket")][-c(1:4)])

save(sample, pop, full_data, file = "./generated/data/recoded_broockman_kalla_data.RData")
```

## Download the Bisgaard (2019) data

```{r}
################################################################################
## Get the Bisgaard (2019) data from dataverse
## located at `https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/FTFJTV`.
##
## Bisgaard, Martin, 2019, "Replication Data for: How Getting the Facts Right Can Fuel Partisan 
## Motivated Reasoning", https://doi.org/10.7910/DVN/FTFJTV, Harvard Dataverse, V1,
## UNF:6:e5b0i5R0vsFibDidAEo4xQ== [fileUNF]

## Download study1.tab, study2.tab, study3.tab, study4.tab all as .RData
################################################################################
if(!file.exists("./generated/data/study1.RData")) {
  writeBin(get_file_by_name(
    filename = "study1.tab",
    dataset  = "doi:10.7910/DVN/FTFJTV",
    server   = "dataverse.harvard.edu"
  ), "./generated/data/study1.RData")
}

if(!file.exists("./generated/data/study2.RData")) {
  writeBin(get_file_by_name(
    filename = "study2.tab",
    dataset  = "doi:10.7910/DVN/FTFJTV",
    server   = "dataverse.harvard.edu"
  ), "./generated/data/study2.RData")
}

if(!file.exists("./generated/data/study3.RData")) {
  writeBin(get_file_by_name(
    filename = "study3.tab",
    dataset  = "doi:10.7910/DVN/FTFJTV",
    server   = "dataverse.harvard.edu"
  ), "./generated/data/study3.RData")
}

if(!file.exists("./generated/data/study4.RData")) {
  writeBin(get_file_by_name(
    filename = "study4.tab",
    dataset  = "doi:10.7910/DVN/FTFJTV",
    server   = "dataverse.harvard.edu"
  ), "./generated/data/study4.RData")
}
```

## Download the Young (2019) data
```{r}
################################################################################
## Get the Young (2019) data from dataverse
## located at `https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/UNNCTR/VWFHUP`.
##
## Young, Lauren, 2018, "Replication Data for: The psychology of state repression: Fear and
## dissent decisions in Zimbabwe.", https://doi.org/10.7910/DVN/UNNCTR, Harvard Dataverse, V1
## 
## Download the zip file from dataverse and extract.
## Read in 01_Data/01_Survey/round_2_clean.csv and save to object "dat"
################################################################################
if(!file.exists("./generated/data/young_2019.RData")) {
  download.file(url = "https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/UNNCTR/VWFHUP", 
                destfile = "./generated/data/young.zip")
  
  unzip("./generated/data/young.zip", exdir = "./generated/data/YoungFiles")
  dat <- read_csv("./generated/data/YoungFiles/ReplicationFiles/01_Data/01_Survey/round_2_clean.csv")
  file.remove("./generated/data/young.zip")
  unlink("./generated/data/YoungFiles", recursive = T, force = T)
  save(dat, file = "./generated/data/young_2019.RData")
}

```
